Now it is time to read in the data and get into a workable format for our problem
# read in the data
df <- read.csv('Data/NEISS/sport_category_final.csv')
# create a contigency table
orig_table <- table(df$prod1,df$body_part)
# create a datafame from the contigency table (at least one that is intuitive)
tm<-as.data.frame.matrix(orig_table) # tm stands for table matrix
and some further cleanup. This time, we need to ensure that all of the classifications of the original data belong to the same categories of that in the Berger and Trinkaus paper.
head_neck <- tm$face + tm$neck + tm$head
shoulder_arm <- tm$`upper arm` + tm$`lower arm` + tm$shoulder + tm$elbow
hand <- tm$hand + tm$finger + tm$wrist
pelvis <- tm$`pubic region` + tm$hip
leg <- tm$knee + tm$`lower leg` + tm$`upper leg`
foot <- tm$foot + tm$toe + tm$ankle
trunk <- tm$`upper trunk` + tm$back
Now let’s wrap this all into a final dataframe that we will use throughout the rest of this analysis.
final<-as.data.frame(cbind(head_neck,shoulder_arm,hand,pelvis,leg,foot,trunk))
rownames(final)<-rownames(tm) # index will be the activity name
Grabbing the values off of the Berger and Trinkaus paper is the first task
sample1<-c(8,4,7,1,1,3,3) # total sample
sample2<-c(7,4,7,1,0,3,1) # without djd
sample3<-c(6,4,5,1,1,3,1) # without shandidar 1
sample4<-c(5,4,5,1,0,3,0) # without djd or shandidar 1
Now it is time to run the actual analysis. We are going to create some tables the involve running a chi square test on all of the samples against all of the activities as well as calculating Cramer’s V.
# total neanderthal sample
neander_total_stat_table<-t(apply(final,1,function(x) {
two_rows = cbind(sample1,x) # compare each row with neanderthal sample
cramer_stat = CramerV(two_rows,conf.level=0.90) # perform cramer's v and find confidence intervals
chi2test = chisq.test(two_rows) # perform chi square
chi2_cols = c(unname(chi2test$statistic), chi2test$p.value) # grab chi square and p-value
frame = cbind(cramer_stat,chi2_cols) # bind cramer's v, conf intervals, chi square and p-value together
final_frame = frame[-c(6)] # drop stupid extra chi square column
names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value") # name columns
return(final_frame) # return frame
}))
final2<-final[-c(6,9),]
neander_wo_djd_stat_table<-t(apply(final2,1,function(x) {
two_rows = cbind(sample2,x)
cramer_stat = CramerV(two_rows,conf.level=0.90)
chi2test = chisq.test(two_rows)
chi2_cols = c(unname(chi2test$statistic), chi2test$p.value)
frame = cbind(cramer_stat,chi2_cols)
final_frame = frame[-c(6)]
names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
return(final_frame)
}))
# sample without shandidar
neander_wo_shan_stat_table<-t(apply(final,1,function(x) {
two_rows = cbind(sample3,x)
cramer_stat = CramerV(two_rows,conf.level=0.90)
chi2test = chisq.test(two_rows)
chi2_cols = c(unname(chi2test$statistic), chi2test$p.value)
frame = cbind(cramer_stat,chi2_cols)
final_frame = frame[-c(6)]
names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
return(final_frame)
}))
# sample without shandidar or djd
final3<-final[-c(5,6,9,14,26,54,70),]
neander_wo_djd_shan_stat_table<-t(apply(final3,1,function(x) {
two_rows = cbind(sample4,x)
cramer_stat = CramerV(two_rows,conf.level=0.90)
chi2test = chisq.test(two_rows)
chi2_cols = c(unname(chi2test$statistic), chi2test$p.value)
frame = cbind(cramer_stat,chi2_cols)
final_frame = frame[-c(6)]
names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
return(final_frame)
}))
Create a function to clean up some vital information about the Chi Square test
chi2cleanup<-function(table){
# read in one of the chi square tables (neander_total_stat_table, neander_wo_djd_stat_table, neander_wo_shan_stat_table or neander_wo_djd_shan_stat_table)
frame<-as.data.frame(table,row.names = rownames(table))
names(frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
fin = na.omit(frame)
# create two callable objects, 1) the rows that have NAs and 2) the final data frame for some final manipulation
c<- list(
rowna=frame[is.nan(frame$`chi square`),], # $rowna
final=fin, # $final
similar=fin[fin$`p-value` > .05,] # $similar (activities that are similar, P > 0.05)
)
return(c)
}
# name cleaned up chi square tables
n_tot<-chi2cleanup(neander_total_stat_table) # neanderthal total
n_djd<-chi2cleanup(neander_wo_djd_stat_table) # neanderthal w/o djd
n_s<-chi2cleanup(neander_wo_shan_stat_table) # neanderthal w/o shan
n_sd<-chi2cleanup(neander_wo_djd_shan_stat_table) # neanderthal w/o shan or djd
Find similar activities per sample
n_tot$similar # neanderthal total
## cramer's v l.ci u.ci chi square p-value
## altar 0.2293476 0 0.2795863 3.576823 0.73372229
## banisters 0.4265850 0 0.5519008 11.100461 0.08532075
## benches 0.2781826 0 0.3621662 8.976722 0.17489146
## bleachers 0.2658938 0 0.3441546 10.958421 0.08967001
## diving board 0.2616941 0 0.3413494 7.396248 0.28575071
## flying discs or boomerangs 0.2944192 0 0.3805758 11.355433 0.07799320
## golf 0.2181053 0 0.2815384 11.749776 0.06778954
## golf carts 0.2179176 0 0.2829501 9.972497 0.12581503
## lawn chair 0.3791534 0 0.4912861 10.638039 0.10022845
## loading docks 0.4014090 0 0.5178498 11.923561 0.06369594
## poles 0.3553561 0 0.4597138 11.112463 0.08496219
## roller hockey 0.4022535 0 0.5229394 9.546665 0.14508720
## snow tubing 0.3946921 0 0.5112343 10.748946 0.09645243
## swimsuit 0.2165592 0 0.2824357 7.175379 0.30493252
## water tubing 0.1667062 0 0.1911635 2.973634 0.81214870
## windsurfing 0.2221475 0 0.2859673 12.534779 0.05104817
n_djd$similar # neanderthal w/o djd
## cramer's v l.ci u.ci chi square
## altar 0.27047673 0 0.34528330 4.682090
## baseball 0.04624864 0 0.06030432 6.979352
## benches 0.29226244 0 0.37993459 9.566742
## bleachers 0.27244015 0 0.35234500 11.207769
## diving board 0.32441635 0 0.41993277 10.945581
## flying discs or boomerangs 0.30639724 0 0.39527697 11.922667
## golf carts 0.24203273 0 0.31208125 12.067448
## paddle ball 0.36939368 0 0.47797835 11.052587
## poles 0.31902142 0 0.41570805 8.549072
## roller hockey 0.38828687 0 0.50618416 8.292168
## swimsuit 0.25182983 0 0.32746768 9.449321
## water tubing 0.23732547 0 0.30787081 5.801308
## p-value
## altar 0.58518450
## baseball 0.32276095
## benches 0.14412345
## bleachers 0.08216346
## diving board 0.09007295
## flying discs or boomerangs 0.06371642
## golf carts 0.06048094
## paddle ball 0.08676471
## poles 0.20057074
## roller hockey 0.21747021
## swimsuit 0.14984055
## water tubing 0.44581190
n_s$similar # neanderthal w/o shan
## cramer's v l.ci u.ci chi square
## altar 0.17836929 0 0.13613673 1.972567
## banisters 0.45072369 0 0.58298172 11.173352
## baseball 0.04605026 0 0.06003702 6.915363
## benches 0.23624442 0 0.30717896 6.139257
## bleachers 0.19842399 0 0.25753648 5.866440
## cheerleading 0.17192926 0 0.22222508 11.380472
## diving board 0.26433150 0 0.34472777 7.126857
## flying discs or boomerangs 0.23830238 0 0.31077076 7.098503
## golf 0.17622636 0 0.22986673 7.484431
## golf carts 0.18065056 0 0.23538386 6.657463
## lawn chair 0.39038559 0 0.50628131 10.363262
## loading docks 0.35299802 0 0.46004815 8.473317
## mountain/all-terrain biking 0.21713915 0 0.28141832 10.561468
## paddle ball 0.35178829 0 0.45703598 9.776645
## patios/flooring 0.23899324 0 0.31070827 9.538668
## poles 0.29146887 0 0.38003710 6.966236
## roller hockey 0.33356498 0 0.43303142 5.897077
## snow tubing 0.38226806 0 0.49740629 9.206119
## swimming pools (not specified) 0.23288837 0 0.30248709 9.871133
## swimsuit 0.21242265 0 0.27676231 6.633137
## water tubing 0.16976590 0 0.19281992 2.910867
## windsurfing 0.19929201 0 0.25885982 9.849892
## p-value
## altar 0.92220419
## banisters 0.08316429
## baseball 0.32874544
## benches 0.40777277
## bleachers 0.43831681
## cheerleading 0.07730557
## diving board 0.30927482
## flying discs or boomerangs 0.31183373
## golf 0.27835805
## golf carts 0.35368900
## lawn chair 0.11016446
## loading docks 0.20543589
## mountain/all-terrain biking 0.10291280
## paddle ball 0.13437860
## patios/flooring 0.14547269
## poles 0.32398093
## roller hockey 0.43481796
## snow tubing 0.16231362
## swimming pools (not specified) 0.13018486
## swimsuit 0.35610991
## water tubing 0.81994803
## windsurfing 0.13111741
n_sd$similar # neanderthal w/o shan or djd
## cramer's v l.ci u.ci chi square
## altar 0.27044152 0 0.34172934 4.315178
## baseball 0.04358653 0 0.05668606 6.189503
## benches 0.27497775 0 0.35856059 8.090566
## bleachers 0.23346681 0 0.30446467 7.957986
## cheerleading 0.17005634 0 0.22004504 11.047118
## diving board 0.34121391 0 0.44080629 11.526266
## flying discs or boomerangs 0.27735819 0 0.36072865 9.385163
## golf 0.20341819 0 0.26422887 9.848193
## golf carts 0.21722266 0 0.28244575 9.484323
## lawn chair 0.42649705 0 0.55041710 11.823483
## loading docks 0.40086369 0 0.51973802 10.444960
## mountain/all-terrain biking 0.23074082 0 0.29783784 11.766333
## paddle ball 0.36197358 0 0.47001334 9.957890
## patios/flooring 0.24713935 0 0.32085759 10.016769
## poles 0.30452592 0 0.39721855 7.326147
## snow tubing 0.45335991 0 0.58401200 12.332112
## swimsuit 0.26351380 0 0.34212870 9.999291
## water tubing 0.25344844 0 0.32981028 6.295139
## p-value
## altar 0.63410647
## baseball 0.40230034
## benches 0.23154305
## bleachers 0.24119759
## cheerleading 0.08693106
## diving board 0.07341106
## flying discs or boomerangs 0.15304725
## golf 0.13119225
## golf carts 0.14811598
## lawn chair 0.06602455
## loading docks 0.10712155
## mountain/all-terrain biking 0.06738931
## paddle ball 0.12643658
## patios/flooring 0.12394763
## poles 0.29173540
## snow tubing 0.05495688
## swimsuit 0.12468188
## water tubing 0.39095341
Lets look at what Rodeo Riders look like in comparison to the Neanderthal samples
n_tot$final['rodeo',] # neanderthal total
## cramer's v l.ci u.ci chi square p-value
## rodeo 0.2198114 0.113429 0.2732467 23.62703 0.000611504
n_djd$final['rodeo',] # neanderthal w/o djd
## cramer's v l.ci u.ci chi square p-value
## rodeo 0.2141958 0.1057345 0.2672423 22.25172 0.001090031
n_s$final['rodeo',] # neanderthal w/o shan
## cramer's v l.ci u.ci chi square p-value
## rodeo 0.1858431 0.06622844 0.2358073 16.68168 0.01052714
n_sd$final['rodeo',] # neanderthal w/o shan or djd
## cramer's v l.ci u.ci chi square p-value
## rodeo 0.1850306 0.06424588 0.2349721 16.43343 0.01160729
similarSelector<-function(frame,final_frame,neander_sample){
require(reshape)
require(plyr)
indices=rownames(frame$similar) # find rows to set as indices
new_rows = final_frame[indices,] # map those to original contigency table
sample =append(c("neander","rodeo"),rownames(new_rows)) # add a new vector to use a "sample" column
rodeo = final_frame['rodeo',] # add rodeo riders
joined_rows = rbind(neander_sample,rodeo,new_rows) # join rows from neanderthal sample to new dataframe
props = prop.table(as.table(as.matrix(joined_rows)),1)
props<-as.data.frame.matrix(props)
joined_cols = cbind(sample,props) # add the "sample" column
rownames(joined_cols) = NULL # remove the row indices
melted <- melt(joined_cols, id=(c("sample"))) # transpose contigency table
return(melted)
}
# an example of how similarSelector works
simToNeanderTotal<-similarSelector(n_tot,final,sample1)
simToNeanderWOdjd<-similarSelector(n_djd,final,sample2)
simToNeanderWOShan<-similarSelector(n_s,final,sample3)
simToNeanderWOdjdOrShan<-similarSelector(n_sd,final,sample4)
Find jus the neanderthals in the above dataframes
# extract just the neanderthal row for emphasis in plots
n2<-simToNeanderTotal[simToNeanderTotal$sample=='neander',]
d2<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='neander',]
s2<-simToNeanderWOShan[simToNeanderWOShan$sample=='neander',]
sd<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='neander',]
# extract just the rodeo riders for emphasis in plots
n3<-simToNeanderTotal[simToNeanderTotal$sample=='rodeo',]
d3<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='rodeo',]
s3<-simToNeanderWOShan[simToNeanderWOShan$sample=='rodeo',]
sd3<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='rodeo',]
Plot those that are similar
plotMaker<-function(total_frame,neander_frame,rodeo_frame){
ggplot(data=total_frame, # this needs to be one of the dataframes directly above
aes(x=factor(variable), y=value,
group=sample,
color=sample)) +
geom_line() +
geom_point() +
geom_point(data=neander_frame,aes(x=factor(variable), y=value,
group=sample, size = 4))+
geom_line(data=neander_frame,aes(x=factor(variable), y=value,
group=sample, size = 2))+
geom_point(data=rodeo_frame,aes(x=factor(variable), y=value,
group=sample, size = 4))+
geom_line(data=rodeo_frame,aes(x=factor(variable), y=value,
group=sample, size = 2))+
scale_x_discrete("Proportion") +
scale_y_continuous("Body Part")+
guides(size=FALSE)
}
The total sample
plotMaker(simToNeanderTotal,n2,n3)
The sample with out djd
plotMaker(simToNeanderWOdjd,d2,d3)
The sample without Shandidar
plotMaker(simToNeanderWOShan,s2,s3)
The sample without djd or Shan
plotMaker(simToNeanderWOdjdOrShan,sd,sd3)